Set Up Steps

library(censusapi)
library(tidycensus)
library(tidyverse)
library(sf)
library(geojsonsf)
library(mapview)
library(dplyr)
library(plotly)
library(tigris)
library(readxl)
library(leaflet)
library(RColorBrewer)
library(sp)
library(usmap)

mapviewOptions(
  basemaps = "CartoDB.Positron",
  #vector.palette = colorRampPalette(cols)
)

options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

Sys.setenv(CENSUS_KEY="0c313bd613a7281ae62c2fbb004d156d647e9c94")
projection <- "+proj=utm +zone=10 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=ft +no_defs"
# Obtain the census variable lookup data
# acs_vars <-
#   listCensusMetadata(
#     name = "2018/acs/acs5",
#     type = "variables"
#   )

# Save an .rds version
# saveRDS(acs_vars, file = "/Users/armellecoutant/Documents/GitHub/218z/Armelle_Coutant/sj_outreach/censusData2018_acs_acs5.rds")

# Obtain the saved census data
acs_vars = readRDS("/Users/armellecoutant/Documents/GitHub/218z/Armelle_Coutant/sj_outreach/censusData2018_acs_acs5.rds")
# Get Santa Clara block groups
sc_blockgroups <-
  block_groups("CA","Santa Clara", cb=F, progress_bar=F) %>% 
  st_transform('+proj=longlat +datum=WGS84')

# Get San Jose geometry
sj_geom <- places("CA", cb = F, progress_bar = FALSE) %>% 
  filter(NAME == "San Jose") %>% 
  st_transform('+proj=longlat +datum=WGS84')

# Filter to Santa Clara block groups within San Jose geometry
sj_blockgroups <- sc_blockgroups[st_contains(sj_geom, sc_blockgroups %>% st_centroid())[[1]],]

Vulnerability Mapping

These vulnerability maps are intended to determine the San Jose block groups with limited access to information, determined by lack of computers and lack of internet within a household; as well as the block groups in need of resources, determined by a measure of household median income.

Percentage of Households Without Internet Access

sc_internet <- 
  getCensus(
    name = "acs/acs5",
    vintage = 2018,
    region = "block group:*", 
    regionin = "state:06+county:085",
    vars = "group(B28002)"
  ) %>%
  mutate(
    blockgroup = paste0(state,county,tract,block_group)
  ) %>%
  select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
  dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(
    key = "variable", 
    value = "estimate", 
    -blockgroup) %>% 
  mutate(
    label = acs_vars$label[match(variable,acs_vars$name)]
    ) %>% 
  dplyr::select(-variable) %>%
  separate(label, into = c(NA, NA, "subscription", "type", "additional"), sep = "!!") %>% 
  filter(is.na(subscription) | subscription == "No Internet access") %>%
  mutate(
    subscription = replace_na(subscription, "total_num")
  ) %>%
  dplyr::select(blockgroup, estimate, subscription) %>%
  spread(key = subscription, value = estimate) %>%
  mutate(
    percent_no_internet = (`No Internet access`*100 / total_num) %>% round(1)
  )
sj_blockgroups_internet <- 
  sc_internet %>% 
  filter(blockgroup %in% sj_blockgroups$GEOID) %>% 
  left_join(sj_blockgroups, by = c("blockgroup" = "GEOID")) %>% 
  st_as_sf() %>%
  st_set_crs(4326) %>%
  st_transform(projection)

total_no_internet_sj = sum(sj_blockgroups_internet$`No Internet access`)
total_sj <- sum(sj_blockgroups_internet$total_num)
perc_no_internet_sj <- total_no_internet_sj*100/total_sj
print(perc_no_internet_sj)
## [1] 8.052816
sj_blockgroups_full <-
  block_groups("CA","Santa Clara", cb=F, progress_bar=F) %>% 
  st_transform('+proj=longlat +datum=WGS84') %>% 
  filter(GEOID %in% sj_blockgroups$GEOID)

mapview(sj_blockgroups_internet %>% dplyr::select(percent_no_internet), layer.name = "Percentage of households without internet access")

Percentage of Households Without a Computer

sc_computer <- getCensus(
  name = "acs/acs5",
  vintage = 2018,
  region = "block group:*", 
  regionin = "state:06+county:085",
  vars = "group(B28003)"
  ) %>%
  mutate(blockgroup = paste0(state,county,tract,block_group)) %>%
  select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>%
  dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  dplyr::select(-variable) %>%
  separate(label, into = c(NA, NA, "computer", "internet"), sep = "!!") %>% 
  filter(is.na(computer) | computer == "No computer") %>%
  mutate(computer = replace_na(computer, "total_num")) %>%
  dplyr::select(blockgroup, estimate, computer) %>%
  spread(key = computer, value = estimate) %>%
  mutate(percent_no_computer = (`No computer`*100 / total_num) %>% round(1))
sj_blockgroups_computer <- 
  sc_computer %>% 
  filter(blockgroup %in% sj_blockgroups$GEOID) %>% 
  left_join(sj_blockgroups, by = c("blockgroup" = "GEOID")) %>% 
  st_as_sf() %>%
  st_set_crs(4326) %>%
  st_transform(projection)

total_no_computer_sj = sum(sj_blockgroups_computer$`No computer`)
total_sj_comp <- sum(sj_blockgroups_computer$total_num)
perc_no_computer_sj <- total_no_computer_sj*100/total_sj_comp
print(perc_no_computer_sj)
## [1] 5.840027
mapview(sj_blockgroups_computer %>% dplyr::select(percent_no_computer), layer.name = "Percentage of households without a computer")

Household Income: Percent Households Below 80% AMI ($100,000)

Using area median income (AMI) is a more appropriate way to measure low-income status, as the cost of living is substantially higher than the federal average in San Jose.

sj_median_income_by_block <-
  getCensus(
    name = "acs/acs5",
    vintage = 2018,
    region = "block group:*", 
    regionin = "state:06+county:085",
    vars = "B19013_001E"
  ) %>%
  mutate(
    blockgroup =
      paste0(state,county,tract,block_group)
  ) %>% 
  select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>% 
  rename(
    Median_Income = B19013_001E 
  ) %>% 
  filter(!is.na(Median_Income)) %>%
  left_join(sj_blockgroups, by = c("blockgroup" = "GEOID")) %>%
  na.omit()
sj_ami_by_block <-
  getCensus(
    name = "acs/acs5",
    vintage = 2018,
    region = "block group:*", 
    regionin = "state:06+county:085",
    vars = "group(B19001)"
  ) %>%
  mutate(
    blockgroup =
      paste0(state,county,tract,block_group)
  ) %>% 
  select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>% 
  dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
  group_by(blockgroup) %>% 
  summarize(
    Total = B19001_001E,
    `Under 75,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E),
    #sum(lapply(2:12, function(x) as.name(paste0("B19001_00",x,"E"))))
    `Under 100,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E)
  ) %>% 
  mutate(
    `% under 75,000` = `Under 75,000` / Total * 100,
    `% under 100,000` = `Under 100,000` / Total * 100
  ) %>% 
  left_join(sj_median_income_by_block %>% dplyr::select(-Median_Income)
  )
sj_ami_by_block <-
  sj_ami_by_block %>%
  st_as_sf()

mapview(sj_ami_by_block, zcol = "% under 100,000")

Combined Vulnerability Map

# m1 <- mapview(sj_blockgroups_internet %>% dplyr::select(percent_no_internet), layer.name = "% Households Without Internet")
# 
# m2 <- mapview(sj_blockgroups_computer %>% dplyr::select(percent_no_computer), layer.name = "% Households Without Computer")
# 
# m3 <- mapview(sj_ami_by_block, zcol = "% under 100,000", layer.name = "% Households Under 80% AMI")
# 
# if (interactive()) {
# library(plainview)
# m1 + m2
# }
# 
# mapview(sj_blockgroups_internet %>% dplyr::select(percent_no_internet), layer.name = "Percentage of households without internet access") + mapview(sj_blockgroups_computer %>% dplyr::select(percent_no_computer), layer.name = "Percentage of households without a computer")

Outreach

This segment explores different outreach options to target the most vulnerable block groups identified, starting with exploring the language breakdown of different block groups. The first outreach option is placing informational posters in highly visited essential businesses. The second outreach option is placing the posters on street poles. The final outreach option is placing the posters on bus routes.

Language

sj_lang_by_block <-
  getCensus(
    name = "acs/acs5",
    vintage = 2018,
    region = "block group:*", 
    regionin = "state:06+county:085",
    vars = "group(B16004)"
  )  %>% 
  mutate(
    blockgroup =
      paste0(state,county,tract,block_group)
  ) %>% 
  select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME")) %>% 
  select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(
    key = "variable",
    value = "estimate", 
    - blockgroup
  ) %>% 
  left_join(acs_vars, by = c("variable" = "name")) %>% 
  mutate(
    tier = substr(label,lapply(label, function(x) max(unlist(gregexpr('!!',x)))+2),nchar(label))
  ) %>% 
  filter(tier %in% c('Speak English "not well"', 
                     'Speak English "not at all"', 
                     'Total', 'Speak Spanish', 
                     'Speak Asian and Pacific Island languages')) %>% 
  group_by(blockgroup, tier) %>% 
  summarise(
    estimate1 = sum(estimate)
  ) %>% 
  spread(
    key = "tier",
    value = "estimate1"
  ) %>% 
  mutate(
    `% speaking english < well` = (`Speak English "not well"` + `Speak English "not at all"`) / Total * 100,
    `% speaking spanish` = (`Speak Spanish`/ Total) * 100,
    `% speaking api` = (`Speak Asian and Pacific Island languages` / Total) * 100
  ) %>% 
  left_join(sj_median_income_by_block %>% dplyr::select(-Median_Income)) %>%
  mutate(
    log_perc = log(`% speaking english < well`)
  ) %>% 
  filter(log_perc > 0) %>%
  st_as_sf()

mapview(sj_lang_by_block)

Example Supermarket Outreach

A block with high % of no internet access:

example <-
  sj_blockgroups_internet[29,]
mapview(example)
# This creates file paths for the SafeGraph folder and COVID-19 GitHub folder.

sg_path <- "/Users/armellecoutant/pCloud Drive/SFBI/Restricted Data Library/Safegraph/" 

github_path  <- "/Users/armellecoutant/Documents/GitHub/covid19/"

# This sets URLs for Point of Interest (POI) location data.

poi_url_Mar2020 <-
  paste0(sg_path, "/core/2020/03/CoreRecords-CORE_POI-2019_03-2020-03-25/ca_poi_Mar20.rds")

poi_url_Apr2020 <-
  paste0(sg_path, "/core/2020/04/CoreApr2020Release-CORE_POI-2020_03-2020-04-07/ca_poi_Apr20.rds")

poi_url_2019 <-
  paste0(sg_path,'covid19analysis/ca_poi.rds')

Can be associated with grocery store locations to determine which grocery stores to target for outreach.

poi_caApr20 <- readRDS (poi_url_Apr2020)

poi_sj <-
  poi_caApr20 %>%
  filter(city == "San Jose") %>%
  filter(naics_code == "445110")

coordinates(poi_sj) <- c("longitude", "latitude")
proj4string(poi_sj) <- CRS("+init=epsg:4326")

mapview(poi_sj)

Example Bus Outreach

# shapes <-
# read_csv("/Users/armellecoutant/Documents/GitHub/218z/Armelle_Coutant/digital_inclusion/gtfs_vta/shapes.txt")

This script includes code adapted from the following sources: - simone_nfo_internet.Rmd - Dem_analysis_plotly.Rmd